library(fpp)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1
##
##
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
##
## ausair, ausbeer, austa, austourists, debitcards, departures,
## elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(TTR)
library(readr)
air <- read_csv("~/Downloads/airtraffic.csv")
## Rows: 249 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): Year, Month, Dom_LF, Int_LF, LF
## num (12): Dom_Pax, Int_Pax, Pax, Dom_Flt, Int_Flt, Flt, Dom_RPM, Int_RPM, RP...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Air_Raw <- air$Flt
Air_ts <- ts(Air_Raw, frequency = 12, start = c(2003,1))
plot(Air_ts)

Air_ts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2003 842827 741610 856120 821265 844662 856576 894576 894497 835821 872580
## 2004 845530 817440 895125 878354 895946 901172 940939 951311 875835 919950
## 2005 872141 820306 935395 904552 941186 933236 961902 964102 876747 892282
## 2006 851600 776504 894057 862927 892727 891734 928415 941286 862487 891323
## 2007 873558 787368 902740 879700 914511 904960 941191 949167 869349 905715
## 2008 864080 813081 891960 870504 891901 888697 921981 898759 788085 819535
## 2009 776904 721118 817127 795925 812825 826477 865158 851337 767489 784846
## 2010 761435 682832 809631 788620 809605 821753 858562 854246 779011 799678
## 2011 749578 687145 826594 788736 811458 826578 860116 839522 768040 786043
## 2012 745717 711905 801999 775919 795525 807244 833647 827637 740518 757502
## 2013 730667 670514 783938 762993 792128 792262 828902 822246 745390 772968
## 2014 693436 640660 772440 747917 768849 778423 815881 800478 727898 757294
## 2015 704627 632620 756818 740629 761713 771398 811289 799989 720759 751372
## 2016 707237 678081 771996 746635 775065 788704 809770 806894 735933 751607
## 2017 716411 655855 770634 745517 779642 795324 820949 810895 712042 756360
## 2018 723111 663958 773066 767348 797342 809686 841499 833431 751454 789191
## 2019 739803 673087 806233 781801 815270 820663 851326 848650 769351 805988
## 2020 770528 724758 667244 222280 257446 280256 427918 459910 397791 433913
## 2021 449262 396590 541632 555249 610401 666566 717100 712664 654042 677379
## 2022 628054 586629 691979 679597 709698 708118 739118 726120 679388 697047
## 2023 672803 628345 726912 706238 739255 736572 764677 768619 713549
## Nov Dec
## 2003 819659 855970
## 2004 878360 899701
## 2005 856725 867307
## 2006 854346 874036
## 2007 868871 874306
## 2008 767933 785484
## 2009 753473 769641
## 2010 765076 768595
## 2011 743438 769169
## 2012 731077 738651
## 2013 719062 731424
## 2014 713691 737455
## 2015 714668 730730
## 2016 719350 734434
## 2017 719555 738657
## 2018 742649 764065
## 2019 758402 793144
## 2020 450701 467620
## 2021 668512 665686
## 2022 664820 663948
## 2023
#Shows the number of flights monthly from 2003 to 2023. Sharp drop in 2020 due to COVID.
air_win <- window(Air_ts, start = c(2021,6))
#Will use a window starting from June 2021, since that is where the air traffic returned to normal from COVID.
plot(air_win)

Acf(air_win)

#Acf shows no strong trend in the data.
summary(air_win)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 586629 666346 694513 692623 719355 768619
boxplot(air_win)

#Average number of flights in the US monthly is 692623. The maximum amount in any month is 768619.
decompose <- decompose(air_win)
plot(decompose)

decompose
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2021 666566 717100 712664 654042 677379
## 2022 628054 586629 691979 679597 709698 708118 739118 726120 679388 697047
## 2023 672803 628345 726912 706238 739255 736572 764677 768619 713549
## Nov Dec
## 2021 668512 665686
## 2022 664820 663948
## 2023
##
## $seasonal
## Jan Feb Mar Apr May Jun
## 2021 27610.0747
## 2022 -36456.1962 -81554.6128 17998.4288 -471.5087 28963.8247 27610.0747
## 2023 -36456.1962 -81554.6128 17998.4288 -471.5087 28963.8247 27610.0747
## Jul Aug Sep Oct Nov Dec
## 2021 56817.9497 40217.2413 -9708.4670 5384.9497 -29183.6337 -19618.0503
## 2022 56817.9497 40217.2413 -9708.4670 5384.9497 -29183.6337 -19618.0503
## 2023 56817.9497 40217.2413 -9708.4670
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug
## 2021 NA NA NA
## 2022 675872.2 677350.3 678967.1 680842.7 681508.3 681282.1 683074.2 686676.9
## 2023 699445.5 702281.2 705475.4 NA NA NA NA NA
## Sep Oct Nov Dec
## 2021 NA NA NA 673223.5
## 2022 689870.6 692436.2 694777.8 697194.9
## 2023 NA
##
## $random
## Jan Feb Mar Apr May Jun
## 2021 NA
## 2022 -11362.054 -9166.720 -4986.512 -774.158 -774.158 -774.158
## 2023 9813.738 7618.405 3438.196 NA NA NA
## Jul Aug Sep Oct Nov Dec
## 2021 NA NA NA NA NA 12080.550
## 2022 -774.158 -774.158 -774.158 -774.158 -774.158 -13628.866
## 2023 NA NA NA
##
## $figure
## [1] 27610.0747 56817.9497 40217.2413 -9708.4670 5384.9497 -29183.6337
## [7] -19618.0503 -36456.1962 -81554.6128 17998.4288 -471.5087 28963.8247
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
#Time series is additive.
#The number of flights is lowest in November, December, January, and February. It is highest in May, June, July, and August. Therefore, this data is seasonal. The number of flights is higher in the summer months because the weather is warmer and people tend to take off work and travel. The total flights are lower in the winter months since the weather is cold and there are also a lot of holidays. People tend to stay home with their family during the holidays.
plot(air_win)
seasonadj_air <- seasadj(decompose)
lines(seasonadj_air, col = "blue")

#There are some large fluctuations between the time series and the seasonally adjusted plot. This means the seasonal fluctuations are very large in the data.
#Naive Method
naive_forecast <- naive(air_win, 12)
plot(naive_forecast)

res_air <- naive_forecast$residuals
plot(res_air)

#Naive forecast predicts a constant number for the next year.
#Plot of residuals shows large fluctuating errors, but no trend over time.
hist(res_air)

#Most of the residuals occur between the values of -50,000 and 50,000. There a lot of large errors for the naive model.
plot(as.numeric(naive_forecast$fitted), as.numeric(res_air))

plot(as.numeric(naive_forecast$x), as.numeric(res_air))

#Both the fitted and actual forecast do not have any correlation with the residuals.
Acf(res_air)

#No trend in reisudals as there are only two lines above the line of significance at lag 6 and 12.
accuracy(naive_forecast)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1740.111 40245.58 30146.33 0.07588592 4.384556 1.10695 -0.343988
#MAPE is 4.3846 which is high.
plot(naive_forecast)

naive_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2023 713549 661972.2 765125.8 634669.1 792428.9
## Nov 2023 713549 640608.4 786489.6 601996.0 825102.0
## Dec 2023 713549 624215.4 802882.6 576925.0 850173.0
## Jan 2024 713549 610395.4 816702.6 555789.2 871308.8
## Feb 2024 713549 598219.8 828878.2 537168.2 889929.8
## Mar 2024 713549 587212.2 839885.8 520333.5 906764.5
## Apr 2024 713549 577089.6 850008.4 504852.4 922245.6
## May 2024 713549 567667.8 859430.2 490443.0 936655.0
## Jun 2024 713549 558818.6 868279.4 476909.3 950188.7
## Jul 2024 713549 550448.9 876649.1 464108.9 962989.1
## Aug 2024 713549 542488.1 884609.9 451934.0 975164.0
## Sep 2024 713549 534881.8 892216.2 440301.0 986797.0
#Overall, not the best. Predicts 713549 flights for every month of the next year.
#Simple Smoothing
ets_air <- ets(air_win)
ets_forecast <- forecast(ets_air, h=12)
plot(ets_forecast)

ets_air
## ETS(A,N,N)
##
## Call:
## ets(y = air_win)
##
## Smoothing parameters:
## alpha = 0.5897
##
## Initial states:
## l = 680302.5868
##
## sigma: 38093.85
##
## AIC AICc BIC
## 687.9040 688.9040 691.9006
#Forecasts a straight line for the next 12 months.
res_ets <- ets_forecast$residuals
plot(res_ets)

#Residuals do not show trend with time.
hist(res_ets)

#Most errors are around 0, but a lot are also around -50,000.
plot(as.numeric(ets_forecast$fitted), as.numeric(res_ets))

plot(as.numeric(ets_forecast$x), as.numeric(res_ets))

#Residuals points are scattered in both plots. No visible trend between the fitted/actuals plots and the residuals.
Acf(res_ets)

#Only significant lines at lag 6 and 12. No trend in residuals.
accuracy(ets_forecast)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 3201.761 36708.16 29211.68 0.24674 4.256593 1.072631 -0.02053556
plot(ets_forecast)

ets_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2023 733171 684351.8 781990.3 658508.4 807833.6
## Nov 2023 733171 676494.9 789847.1 646492.4 819849.6
## Dec 2023 733171 669601.9 796740.2 635950.4 830391.7
## Jan 2024 733171 663386.4 802955.7 626444.6 839897.4
## Feb 2024 733171 657680.9 808661.1 617718.9 848623.2
## Mar 2024 733171 652377.4 813964.7 609607.8 856734.3
## Apr 2024 733171 647401.1 818940.9 601997.3 864344.8
## May 2024 733171 642698.2 823643.9 594804.7 871537.3
## Jun 2024 733171 638227.9 828114.2 587968.0 878374.0
## Jul 2024 733171 633958.8 832383.2 581439.1 884903.0
## Aug 2024 733171 629866.1 836476.0 575179.7 891162.4
## Sep 2024 733171 625929.3 840412.7 569159.0 897183.0
#HoltWinters
HW_air <- HoltWinters(air_win)
HW_forecast <- forecast(HW_air, h=12)
plot(HW_forecast)

HW_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2023 733816.8 718857.0 748776.7 710937.7 756696.0
## Nov 2023 702511.5 687355.8 717667.3 679332.9 725690.2
## Dec 2023 728195.1 712758.3 743632.0 704586.5 751803.7
## Jan 2024 691177.7 675362.9 706992.4 666991.0 715364.3
## Feb 2024 651537.9 635239.4 667836.3 626611.6 676464.2
## Mar 2024 758534.4 741640.7 775428.2 732697.6 784371.2
## Apr 2024 747540.1 729936.3 765144.0 720617.3 774462.9
## May 2024 780238.7 761809.5 798668.0 752053.6 808423.9
## Jun 2024 782148.3 762779.9 801516.6 752526.9 811769.6
## Jul 2024 814619.4 794201.5 835037.4 783392.9 845846.0
## Aug 2024 801282.0 779708.4 822855.6 768288.1 834276.0
## Sep 2024 754619.6 731789.2 777450.0 719703.5 789535.7
res_HW <- HW_forecast$residuals
plot(res_HW)

#Some large errors around 2023.
hist(res_HW)

#Most errors are concentrated between -10,000 and 10,000.
plot(as.numeric(HW_forecast$fitted), as.numeric(res_HW))

plot(as.numeric(HW_forecast$x), as.numeric(res_HW))

#No correlation between the residuals and fitted/actual forecasts. Generally concentrated around 0 except for a few outliers.
Acf(res_HW)

#No trend in residuals.
accuracy(HW_forecast)
## ME RMSE MAE MPE MAPE MASE
## Training set 2687.366 11617.68 9200.157 0.4072509 1.329879 0.3378227
## ACF1
## Training set 0.008274902
#Very low MAPE at 1.33. RMSE is also 11617.68, which is much lower than the other forecasts so far.
plot(HW_forecast)

HW_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2023 733816.8 718857.0 748776.7 710937.7 756696.0
## Nov 2023 702511.5 687355.8 717667.3 679332.9 725690.2
## Dec 2023 728195.1 712758.3 743632.0 704586.5 751803.7
## Jan 2024 691177.7 675362.9 706992.4 666991.0 715364.3
## Feb 2024 651537.9 635239.4 667836.3 626611.6 676464.2
## Mar 2024 758534.4 741640.7 775428.2 732697.6 784371.2
## Apr 2024 747540.1 729936.3 765144.0 720617.3 774462.9
## May 2024 780238.7 761809.5 798668.0 752053.6 808423.9
## Jun 2024 782148.3 762779.9 801516.6 752526.9 811769.6
## Jul 2024 814619.4 794201.5 835037.4 783392.9 845846.0
## Aug 2024 801282.0 779708.4 822855.6 768288.1 834276.0
## Sep 2024 754619.6 731789.2 777450.0 719703.5 789535.7
#Forecast follows the seasonal trends of air traffic in the US. Definitely the best so far.
#ARIMA
Pacf(air_win)

#Must difference because Pacf has one significiant point at lag 1 and immediately drops off.
ndiffs(air_win)
## [1] 1
#There is 1 difference needed to make it stationary.
tsdisplay(air_win)

air_windiff1 <- diff(air_win, differences=1)
plot(air_windiff1)

tsdisplay(air_windiff1)

auto_fit <- auto.arima(air_win, trace=TRUE, stepwise = FALSE)
##
## ARIMA(0,0,0)(0,1,0)[12] : 377.6012
## ARIMA(0,0,0)(0,1,0)[12] with drift : 355.8378
## ARIMA(0,0,1)(0,1,0)[12] : 367.228
## ARIMA(0,0,1)(0,1,0)[12] with drift : 354.3083
## ARIMA(0,0,2)(0,1,0)[12] : 365.2007
## ARIMA(0,0,2)(0,1,0)[12] with drift : 357.7845
## ARIMA(0,0,3)(0,1,0)[12] : 368.626
## ARIMA(0,0,3)(0,1,0)[12] with drift : 360.7292
## ARIMA(0,0,4)(0,1,0)[12] : Inf
## ARIMA(0,0,4)(0,1,0)[12] with drift : Inf
## ARIMA(0,0,5)(0,1,0)[12] : Inf
## ARIMA(0,0,5)(0,1,0)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,0)[12] : 360.7631
## ARIMA(1,0,0)(0,1,0)[12] with drift : 356.8136
## ARIMA(1,0,1)(0,1,0)[12] : 363.482
## ARIMA(1,0,1)(0,1,0)[12] with drift : 357.8571
## ARIMA(1,0,2)(0,1,0)[12] : Inf
## ARIMA(1,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(1,0,3)(0,1,0)[12] : Inf
## ARIMA(1,0,3)(0,1,0)[12] with drift : Inf
## ARIMA(1,0,4)(0,1,0)[12] : Inf
## ARIMA(1,0,4)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,0)[12] : 363.8195
## ARIMA(2,0,0)(0,1,0)[12] with drift : 357.7598
## ARIMA(2,0,1)(0,1,0)[12] : Inf
## ARIMA(2,0,1)(0,1,0)[12] with drift : 361.5047
## ARIMA(2,0,2)(0,1,0)[12] : Inf
## ARIMA(2,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(2,0,3)(0,1,0)[12] : Inf
## ARIMA(2,0,3)(0,1,0)[12] with drift : Inf
## ARIMA(3,0,0)(0,1,0)[12] : 363.2131
## ARIMA(3,0,0)(0,1,0)[12] with drift : 361.4417
## ARIMA(3,0,1)(0,1,0)[12] : 367.4622
## ARIMA(3,0,1)(0,1,0)[12] with drift : 366.7085
## ARIMA(3,0,2)(0,1,0)[12] : Inf
## ARIMA(3,0,2)(0,1,0)[12] with drift : Inf
## ARIMA(4,0,0)(0,1,0)[12] : 367.4735
## ARIMA(4,0,0)(0,1,0)[12] with drift : 366.5294
## ARIMA(4,0,1)(0,1,0)[12] : 372.7396
## ARIMA(4,0,1)(0,1,0)[12] with drift : Inf
## ARIMA(5,0,0)(0,1,0)[12] : Inf
## ARIMA(5,0,0)(0,1,0)[12] with drift : 371.9212
##
##
##
## Best model: ARIMA(0,0,1)(0,1,0)[12] with drift
Acf(air_windiff1)

Pacf(air_windiff1)

#Best ARIMA model is ARIMA(0,0,1)(0,1,0)[12] with drift. Drift indicates a slight increasing trend of total flights. The model says the time series is seasonal and there is one seasonal difference being used.
auto_fit
## Series: air_win
## ARIMA(0,0,1)(0,1,0)[12] with drift
##
## Coefficients:
## ma1 drift
## 0.6440 2244.9556
## s.e. 0.2134 398.8647
##
## sigma^2 = 162913382: log likelihood = -173.15
## AIC=352.31 AICc=354.31 BIC=354.63
attributes(auto_fit)
## $names
## [1] "coef" "sigma2" "var.coef" "mask" "loglik" "aic"
## [7] "arma" "residuals" "call" "series" "code" "n.cond"
## [13] "nobs" "model" "xreg" "bic" "aicc" "x"
## [19] "fitted"
##
## $class
## [1] "forecast_ARIMA" "ARIMA" "Arima"
plot(forecast(auto_fit,h=5,level=c(80)))

#This model has the lowest BIC of 354.63.
#Shows there is an 80% chance the actual value lies in the forecast range.
res_arima <- auto_fit$residuals
plot(res_arima)

Acf(res_arima)

hist(res_arima)

plot(as.numeric(auto_fit$fitted), as.numeric(res_arima))

plot(as.numeric(auto_fit$x), as.numeric(res_arima))

#Most errors are between 0 and 10,000 which are smaller errors for this data.
#No trend in residuals as shown in ACF.
#Most residuals are concentrated around 0 for both actual and fitted forecasts, with the exception of a few outliers.
accuracy(auto_fit)
## ME RMSE MAE MPE MAPE MASE
## Training set 94.89433 9025.336 5640.899 0.003476053 0.8080238 0.2071295
## ACF1
## Training set -0.04472245
arima_forecast <- forecast(auto_fit, h=12)
plot(arima_forecast)

arima_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2023 722290.8 705933.4 738648.2 697274.3 747307.3
## Nov 2023 691759.5 672303.9 711215.0 662004.7 721514.2
## Dec 2023 690887.5 671431.9 710343.0 661132.7 720642.2
## Jan 2024 699742.5 680286.9 719198.0 669987.7 729497.2
## Feb 2024 655284.5 635828.9 674740.0 625529.7 685039.2
## Mar 2024 753851.5 734395.9 773307.0 724096.7 783606.2
## Apr 2024 733177.5 713721.9 752633.0 703422.7 762932.2
## May 2024 766194.5 746738.9 785650.0 736439.7 795949.2
## Jun 2024 763511.5 744055.9 782967.0 733756.7 793266.2
## Jul 2024 791616.5 772160.9 811072.0 761861.7 821371.2
## Aug 2024 795558.5 776102.9 815014.0 765803.7 825313.2
## Sep 2024 740488.5 721032.9 759944.0 710733.7 770243.2
#Forecast follows the seasonal trends of the past while expecting the total flights to slightly increase with time (drift).
#MAPE is 0.8080238, which is the lowest out of all the forecasts. RMSE is 9025.336 which is also the lowest of all the forecasts.
#ARIMA looks like the best forecast.
#Conclusion
#The ARIMA(0,0,1)(0,1,0)[12] with drift model is the best forecast to use to predict the total flights in the US over the next year. The ARIMA follows the seasonal trend of an increase in flights in the summer months, and a sharp decrease in the winter months. It accounts for an expected increase in total flights, due to more planes being built and more people wanting to fly places over time.
#These seasonal fluctuations occur because people tend to go on vacations and travel in the summer when the weather is warm. Kids are out of school in the summer so it gives families an opportunity to travel places. In the winter, the weather is cold and most people are working or in school. Also, there are many holidays in the fall/winter, so people tend to stay home and be with their family.
#The ARIMA model had the lowest MAPE at 0.8080238, and the lowest RMSE 9025.336, indicating it had the smallest errors of all the forecast. There were no trend in the residuals, and when plotting the forecast vs the residuals, the errors were all concentrated around 0. The histogram of the residuals showed the vast majority of the errors to be within in -10,000 and 10,000 which is small for this data set. These accuracy measures all contributed to choosing ARIMA as the best model for predicting the total flights in the US over the next year.
#Insights
#Airline companies should have more planes and pilots on staff for the summer months, as there will be an increased amount of flights being booked during this time. Airports can also prepare for this by having more air traffic control workers during the summer, as well as more restaurants open for the larger crowds. In the winter, airline companies can limit the amount of planes being flown to limit mileage and fuel costs.